// this file is ALWAYS called by another do-file

* * *   * * *   * * *   * * *   * * *   * * *
*   LOAD & PREPARE THE DATA   
* * *   * * *   * * *   * * *   * * *   * * *


frame create prepdta2
frame change prepdta2

use "$mypathRR/Datasets/ESTV/allcant_taxpayers_coll_B.dta", clear
keep snp_gdenr year control n_taxpayers_tot n_taxpayers_ctr reink_ctr  reink_tot cant
duplicates drop
drop if snp_gdenr == .

// correct the outliers
	* winsorize observations with a high reink
	winsor2 reink_ctr if control == 1, cuts(0 99) suffix(_w) by(year)
	winsor2 reink_tot , cuts(0 98) suffix(_w) by(year)
	
// get income and revenue sums by municipality for treated and untreated
preserve
drop if control==1
keep snp_gdenr year n_taxpayers_ctr reink_ctr reink_ctr_w 
rename *_ctr *_ctrl

tempfile control
save `control'
restore

preserve
drop if control==0 
keep snp_gdenr year n_taxpayers_ctr reink_ctr reink_ctr_w

tempfile treated
save `treated'
restore



* add the other variables
keep if control == 0
drop n_taxpayers_ctr reink_ctr reink_ctr_w
drop control
merge 1:1 snp_gdenr year using `treated'
merge 1:1 snp_gdenr year using `control', nogen


* fill the gaps of the municipality-years that do not exist in treated data
foreach var in n_taxpayers_ctr reink_ctr reink_ctr_w  {
	replace `var' = 0 if _merge == 1 & `var' == . 
}
drop _merge

 // gen sums
foreach var in reink  {
	gen `var'_sum = `var'_tot * n_taxpayers_tot
	gen `var'_sum_ctr = `var'_ctr * n_taxpayers_ctr
	gen `var'_sum_ctr_w = `var'_ctr_w * n_taxpayers_ctr
	gen `var'_sum_ctrl = `var'_ctrl * n_taxpayers_ctrl
}
rename reink_tot reink


// collapse at canton level
collapse (sum) reink_sum reink_sum_ctr reink_sum_ctr_w reink_sum_ctrl ///
n_taxpayers_tot n_taxpayers_ctr n_taxpayers_ctrl ///
, by(year cant)

sort cant year




// Generate and label the dependent variables
* gen average income per capita in the canton
foreach var in reink  {
    gen `var' = `var'_sum / n_taxpayers_tot
    gen `var'_ctr= `var'_sum_ctr / n_taxpayers_ctr
    gen `var'_ctr_w= `var'_sum_ctr_w / n_taxpayers_ctr
    gen `var'_ctrl= `var'_sum_ctrl / n_taxpayers_ctrl
}

foreach var in reink reink_ctr reink_ctr_w reink_ctrl {
	replace `var'=`var'/10
	label var `var' "Net income p.c. (in 1000 CHF)"
}


* gen log incomes
foreach var in reink reink_ctr reink_ctr_w reink_ctrl {
	gen ln_`var' = ln(`var')
}




* gen share of high-income taxpayers
gen n200K = n_taxpayers_ctr
	label var n200K "Number of high-income taxpayers"

gen share200K = n200K/n_taxpayers_tot*100
	label var share200K "Share of high-income taxpayers (in %)"
	
gen ln_n200K = ln(n200K)
	label var ln_n200K "Log number of high-income taxpayers taxpayers"
	
gen ln_share200K = ln(share200K)
	label var ln_share200K "Log share of high-income taxpayers (in %)"


* gen labels
label var ln_reink "Log of net income p.c. (in 1000 CHF)"

label var reink_ctr "Net income p.c. of high-income taxpayers (in 1000 CHF)"
label var ln_reink_ctr "Log of net income p.c. of high-income taxpayers (in 1000 CHF)"
label var reink_ctr_w "Net income p.c. of high-income taxpayers (in 1000 CHF) [top 1% winsorized]"
label var ln_reink_ctr_w "Log of net income p.c. of high-income taxpayers (in 1000 CHF) [top 1% winsorized]"

label var reink_ctrl "Net income p.c. of other taxpayers (in 1000 CHF)"
label var ln_reink_ctrl "Log of net income p.c. of other taxpayers  (in 1000 CHF)"

label var n_taxpayers_ctr "Number of high-income taxpayers taxpayers"
label var n_taxpayers_ctrl "Number of non-treated taxpayers"
label var n_taxpayers_tot "Total number of taxpayers"



// Gen treatment variables
* Gen canton treatment dummies
gen treated = (cant == 6)
  label var treated "Treated canton"
  
gen period = (year > 2005 & year!=.)
  label var period "Period $ t>2005$"

* Gen DiD Interaction term
gen Interaction = treated*period
  label var Interaction "DiD"


* Gen interaction terms for pre-treatment periods
forval n=1/35 {
local i= 2006 -`n'
gen pre`n'=0
replace pre`n'= 1 if treated == 1 & year == `i'
label var pre`n' "`i' (treatment lag `n')"
}

replace pre1 = 0 // make 2005 the reference year


* drop dummies for years that do not exist in tax data and / or no tax data exists for OW
drop pre34 pre32 pre30 pre28 pre26 pre24 pre22 pre21 pre20 pre18 pre16 pre14 pre12 pre10 pre8 pre7 pre6 
// it is important for ES estimation to have a balanced sample!


* Gen interaction terms for post-treatment periods
forval n=1/11 {
local i = 2005 + `n'
gen post`n' = 0
replace post`n' = 1 if treated == 1 & year == `i'
label var post`n' "`i' (treatment lead `n')"
}

* Gen canton-specific time trends
	tab cant, gen (cant_d)
	foreach var of varlist cant_d* {
	gen trend_`var' = year*`var'
	label var trend_`var' "Canton specific time trend"
	}
	gen trend = year


	
	
* * * * define the correct weight variable for each outcome * * * * 

* get number of taxpayers in pre-treatment period
	frame put n_taxpayers_tot n_taxpayers_ctrl n_taxpayers_ctr cant year , into(weights1)
	frame change weights1
		
		keep if year < 2006
		collapse n_taxpayers_tot n_taxpayers_ctrl n_taxpayers_ctr, by(cant)
		foreach var in n_taxpayers_tot n_taxpayers_ctrl n_taxpayers_ctr {
			replace `var' = round(`var')
			rename `var' `var'_pre2005
		}

	frame change prepdta2


	frame put n_taxpayers_tot n_taxpayers_ctrl n_taxpayers_ctr cant year , into(weights2)
	frame change weights2

		keep if year == 2005
		drop year
		foreach var in n_taxpayers_tot n_taxpayers_ctrl n_taxpayers_ctr {
			rename `var' `var'_2005
		}
		
	frame change prepdta2

* link back to main frame
	frlink m:1 cant, frame(weights1) gen(link_weights1)
	frget n_taxpayers_tot_pre2005 n_taxpayers_ctrl_pre2005 n_taxpayers_ctr_pre2005, from(link_weights1)
	
	frlink m:1 cant, frame(weights2) gen(link_weights2)
	frget n_taxpayers_tot_2005 n_taxpayers_ctrl_2005 n_taxpayers_ctr_2005, from(link_weights2)
	
	
* replace with actual taxpayers in pre-treatment period
	foreach var in n_taxpayers_tot n_taxpayers_ctrl n_taxpayers_ctr {
	replace `var'_2005 = `var' if year < 2005
	}

* generate a weigth variable for each outcome
foreach outcome in n200K ln_n200K  share200K ln_share200K  reink ln_reink {
	gen weight_`outcome' = n_taxpayers_tot_2005
}

foreach outcome in reink_ctrl   ln_reink_ctrl  {
	gen weight_`outcome' = n_taxpayers_ctrl_2005
}

foreach outcome in reink_ctr  ln_reink_ctr  reink_ctr_w  ln_reink_ctr_w {
	gen weight_`outcome' = n_taxpayers_ctr_2005
}



* * *   * * *   * * *   * * *   * * *   * * *   * * *   * * *   * * *   * * *   * * *   * * *
*   TIME TREND ESTIMATION


global outcomes "n200K ln_n200K  share200K ln_share200K  reink ln_reink reink_ctr reink_ctr_w reink_ctrl ln_reink_ctr_w ln_reink_ctr ln_reink_ctrl"

* Estimate a time trend for each  outcome	 
cd "$mypathRR"
	
* estimate & predict canton trend
	preserve
		foreach outcome in $outcomes  {
			reg `outcome' trend_* cant_d* if year < 2006 & year > 1994 [aweight = weight_`outcome']
			predict tr`outcome', xb
			label var tr`outcome' "predicted cantonal time trend in `outcome'"
		}
		
		keep  cant year tr*
		drop trend* treated
		bys cant year: keep if _n==1
	save "Datasets/ESTV/est_canton_trends-canton_level-200K.dta", replace
	restore 	 
	drop cant_d*

* match trend back to original data
	merge m:1 year cant using "Datasets/ESTV/est_canton_trends-canton_level-200K.dta", nogen keep(1 3)
	rm "Datasets/ESTV/est_canton_trends-canton_level-200K.dta"


* generate residualized outcomes
foreach outcome in $outcomes {
	reg `outcome' tr`outcome'
	predict resid`outcome', residual
	label var resid`outcome' "residualized `outcome' (after taking out canton trend)"
}

frame change default


* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
 
frame create prepdta1
frame change prepdta1


use "$mypathRR/Datasets/ESTV/allcant_taxpayers_coll_B.dta", clear
keep snp_gdenr year treat n_taxpayers_tot n_taxpayers_tr reink_tr stbetr_tr  reink_tot stbetr_tot cant
duplicates drop
drop if snp_gdenr == .


// correct the outliers
	* winsorize observations with a high reink
	winsor2 reink_tr if treat == 1, cuts(0 99.9) suffix(_w) by(year)
	winsor2 reink_tot , cuts(0 98) suffix(_w) by(year)

// get income and revenue sums by municipality for treated and untreated
preserve
drop if treat==1
keep snp_gdenr year n_taxpayers_tr reink_tr reink_tr_w stbetr_tr
rename *_tr *_ctrl

tempfile control
save `control'
restore

preserve
drop if treat==0
keep snp_gdenr year n_taxpayers_tr reink_tr reink_tr_w stbetr_tr

tempfile treated
save `treated'
restore


* add the other variables
keep if treat == 0
drop n_taxpayers_tr reink_tr reink_tr_w stbetr_tr
drop treat
merge 1:1 snp_gdenr year using `treated'
merge 1:1 snp_gdenr year using `control', nogen


* fill the gaps of the municipality-years that do not exist in treated data
foreach var in n_taxpayers_tr reink_tr reink_tr_w stbetr_tr {
	replace `var' = 0 if _merge == 1 & `var' == . 
}
drop _merge


 // gen sums
foreach var in reink   {
	gen `var'_sum = `var'_tot * n_taxpayers_tot
	gen `var'_sum_w = `var'_tot_w * n_taxpayers_tot
	gen `var'_sum_tr = `var'_tr * n_taxpayers_tr
	gen `var'_sum_tr_w = `var'_tr_w * n_taxpayers_tr
	gen `var'_sum_ctrl = `var'_ctrl * n_taxpayers_ctrl
}
rename reink_tot reink

foreach var in stbetr  {
	gen `var'_sum = `var'_tot * n_taxpayers_tot
	gen `var'_sum_tr = `var'_tr * n_taxpayers_tr
	gen `var'_sum_ctrl = `var'_ctrl * n_taxpayers_ctrl
}
rename stbetr_tot stbetr


// collapse at canton level
collapse (sum) reink_sum reink_sum_w reink_sum_tr reink_sum_tr_w reink_sum_ctrl ///
n_taxpayers_tot n_taxpayers_tr n_taxpayers_ctrl ///
, by(year cant)

sort cant year



// Add info on number of taxpayers who moved away from canton C to OW
merge 1:1 cant year using "Datasets/movers/movers_cant_origin.dta", nogen keepusing(Inmovers Inmovers_cum)
replace Inmovers = 0 if Inmovers == .
replace Inmovers_cum = 0 if Inmovers_cum == .


merge 1:1 cant year using "Datasets/movers/movers_cant_destination.dta", nogen keepusing(Movers_destin Movers_destin_cum)
replace Movers_destin = 0 if Movers_destin == .
replace Movers_destin_cum = 0 if Movers_destin_cum == .

// generate a corrected number of taxpayers
gen n_taxpayers_tr_corr = n_taxpayers_tr + Inmovers_cum
gen n_taxpayers_tot_corr = n_taxpayers_tot + Inmovers_cum

gen n_taxpayers_tr_corr2 = n_taxpayers_tr + Inmovers - Movers_destin_cum
gen n_taxpayers_tot_corr2 = n_taxpayers_tot + Inmovers - Movers_destin_cum


* gen average income per capita in the canton
foreach var in reink  {
    gen `var' = `var'_sum / n_taxpayers_tot
    gen `var'_w = `var'_sum_w / n_taxpayers_tot
    gen `var'_tr= `var'_sum_tr / n_taxpayers_tr
    gen `var'_tr_w= `var'_sum_tr_w / n_taxpayers_tr
    gen `var'_ctrl= `var'_sum_ctrl / n_taxpayers_ctrl
}

foreach var in reink reink_w reink_tr reink_tr_w reink_ctrl {
	replace `var'=`var'/10
	label var `var' "Net income p.c. (in 1000 CHF)"
}

* gen log incomes
foreach var in reink reink_w reink_tr reink_tr_w reink_ctrl {
	gen ln_`var' = ln(`var')
}

* gen share of rich taxpayers
gen n300K = n_taxpayers_tr
	label var n300K "Number of rich"

gen share300K = n300K/n_taxpayers_tot*100
	label var share300K "Share of rich (in %)"
	
gen share300Kc = n_taxpayers_tr_corr/n_taxpayers_tot_corr*100
	label var share300Kc "Share of rich (in %), corrected"

gen share300Kc2 = n_taxpayers_tr_corr2/n_taxpayers_tot_corr2*100
	label var share300Kc2 "Share of rich (in %), corrected for all movers"

gen ln_n300K = ln(n300K)
	label var ln_n300K "Log rich taxpayers"
	
gen ln_share300K = ln(share300K)
	label var ln_share300K "Log share of rich (in %)"
	
	
* labels
label var ln_reink "Log of net income p.c. (in 1000 CHF)"
label var ln_reink_w "Log of net income p.c. (in 1000 CHF)"
  
label var reink_tr "Net income p.c. of treated (in 1000 CHF)"
label var reink_tr_w "Net income p.c. of treated (in 1000 CHF)"
label var ln_reink_tr "Log of net income p.c. of treated (in 1000 CHF)"
label var ln_reink_tr_w "Log of net income p.c. of treated (in 1000 CHF)"

label var reink_ctrl "Net income p.c. of non-treated (in 1000 CHF)"
label var ln_reink_ctrl "Log of net income p.c. of non-treated (in 1000 CHF)"

label var n_taxpayers_tr "Number of treated taxpayers"
label var n_taxpayers_ctrl "Number of non-treated taxpayers"
label var n_taxpayers_tot "Total number of taxpayers"

label var reink_sum "Total net income (in 1000 CHF)"





// Gen treatment variables
* Gen canton treatment dummies
gen treated = (cant == 6)
  label var treated "Treated"
  
gen period = (year > 2005 & year!=.)
  label var period "Period $ t>2005$"

* Gen DiD Interaction term
gen Interaction = treated*period
  label var Interaction "DiD"


* Gen interaction terms for pre-treatment periods
forval n=1/35 {
local i= 2006 -`n'
gen pre`n'=0
replace pre`n'= 1 if treated == 1 & year == `i'
label var pre`n' "`i' (treatment lag `n')"
}

replace pre1 = 0 // make 2005 the reference year


* drop dummies for years that do not exist in tax data and / or no tax data exists for OW
drop pre34 pre32 pre30 pre28 pre26 pre24 pre22 pre21 pre20 pre18 pre16 pre14 pre12 pre10 pre8 pre7 pre6 
// it is important for ES estimation to have a balanced sample!


* Gen interaction terms for post-treatment periods
forval n=1/11 {
local i = 2005 + `n'
gen post`n' = 0
replace post`n' = 1 if treated == 1 & year == `i'
label var post`n' "`i' (treatment lead `n')"
}

* Gen canton-specific time trends
	tab cant, gen (cant_d)
	foreach var of varlist cant_d* {
	gen trend_`var' = year*`var'
	label var trend_`var' "Canton specific time trend"
	}
	gen trend = year


	
	
* * * * define the correct weight variable for each outcome * * * * 

* get number of taxpayers in pre-treatment period
	frame put n_taxpayers_tot n_taxpayers_ctrl n_taxpayers_tr cant year , into(weights3)
	frame change weights3
		
		keep if year < 2006
		collapse n_taxpayers_tot n_taxpayers_ctrl n_taxpayers_tr, by(cant)
		foreach var in n_taxpayers_tot n_taxpayers_ctrl n_taxpayers_tr {
			replace `var' = round(`var')
			rename `var' `var'_pre2005
		}
		
	frame change prepdta1


	frame put n_taxpayers_tot n_taxpayers_ctrl n_taxpayers_tr cant year , into(weights4)
	frame change weights4

		keep if year == 2005
		drop year
		foreach var in n_taxpayers_tot n_taxpayers_ctrl n_taxpayers_tr {
			rename `var' `var'_2005
		}
		
	frame change prepdta1


* link back to main frame
	frlink m:1 cant, frame(weights3) gen(link_weights3)
	frget n_taxpayers_tot_pre2005 n_taxpayers_ctrl_pre2005 n_taxpayers_tr_pre2005, from(link_weights3)
	
	frlink m:1 cant, frame(weights4) gen(link_weights4)
	frget n_taxpayers_tot_2005 n_taxpayers_ctrl_2005 n_taxpayers_tr_2005, from(link_weights4)
	
* replace with actual taxpayers in pre-treatment period
	foreach var in n_taxpayers_tot n_taxpayers_ctrl n_taxpayers_tr {
	replace `var'_2005 = `var' if year < 2005
	}
	

* gen outcome-specific weights

foreach outcome in n300K ln_n300K  share300K ln_share300K  reink reink_w ln_reink ln_reink_w   {
	gen weight_`outcome' = n_taxpayers_tot_2005
}

foreach outcome in reink_ctrl  ln_reink_ctrl  {
	gen weight_`outcome' = n_taxpayers_ctrl_2005
}

foreach outcome in reink_tr reink_tr_w ln_reink_tr ln_reink_tr_w  {
	gen weight_`outcome' = n_taxpayers_tr_2005
}

gen weight_share300Kc = n_taxpayers_tot_corr
gen weight_share300Kc2 = n_taxpayers_tot_corr2


* * *   * * *   * * *   * * *   * * *   * * *   * * *   * * *   * * *   * * *   * * *   * * *
*   TIME TREND ESTIMATION

global outcomes "n300K ln_n300K  share300Kc share300Kc2 share300K ln_share300K  reink reink_w ln_reink ln_reink_w reink_tr reink_tr_w reink_ctrl ln_reink_tr ln_reink_tr_w ln_reink_ctrl"

* Estimate a time trend for each  outcome	 
cd "$mypathRR"
	
* estimate & predict canton trend
	preserve
		foreach outcome in $outcomes  {
			reg `outcome' trend_* cant_d* if year < 2006 & year > 1994 [aweight = weight_`outcome']
			predict tr`outcome', xb
			label var tr`outcome' "cantonal time trend in `outcome'"
		}
		
		keep  cant year tr*
		drop trend* treated
		bys cant year: keep if _n==1
	save "Datasets/ESTV/est_canton_trends-canton_level.dta", replace
	restore 	 
	drop cant_d*

* match trend back to original data
	merge m:1 year cant using "Datasets/ESTV/est_canton_trends-canton_level.dta", nogen keep(1 3)
	rm "Datasets/ESTV/est_canton_trends-canton_level.dta"

* generate residualized outcomes
foreach outcome in $outcomes {
	reg `outcome' tr`outcome'
	predict resid`outcome', residual
	label var resid`outcome' "residualized `outcome' (after taking out canton trend)"
}



* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *


*   *   *   *   *   *   *   *   *   *   *   *   *   *   *   *   *   *   *   *   *


// LINK THE TWO FRAMES
frlink 1:1 year cant, frame(prepdta2 year cant) gen(lnkdta)

frget reink_ctr reink_ctr_w weight_reink_ctr weight_reink_ctr_w trreink_ctr trreink_ctr_w residreink_ctr residreink_ctr_w ///
 ln_reink_ctr ln_reink_ctr_w weight_ln_reink_ctr_w trln_reink_ctr trln_reink_ctr_w residln_reink_ctr residln_reink_ctr_w weight_ln_reink_ctr ///
share200K ln_share200K weight_share200K weight_ln_share200K trshare200K trln_share200K residshare200K residln_share200K , from(lnkdta)


foreach var in reink_ctr reink_ctr_w weight_reink_ctr weight_reink_ctr_w trreink_ctr trreink_ctr_w residreink_ctr residreink_ctr_w ///
 ln_reink_ctr ln_reink_ctr_w weight_ln_reink_ctr_w trln_reink_ctr trln_reink_ctr_w residln_reink_ctr residln_reink_ctr_w weight_ln_reink_ctr ///
share200K ln_share200K weight_share200K weight_ln_share200K trshare200K trln_share200K residshare200K residln_share200K {
	replace `var' = 0 if `var' == . 
}
